import cv2
import pandas as pd
import torch
import numpy as np
import requests
from PIL import Image
import matplotlib.pyplot as plt
from io import BytesIO
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load the TSV file into a DataFrame
df = pd.read_csv('/kaggle/input/unsplash/photos.tsv000', sep='\t')
df.head()
| photo_id | photo_url | photo_image_url | photo_submitted_at | photo_featured | photo_width | photo_height | photo_aspect_ratio | photo_description | photographer_username | ... | photo_location_country | photo_location_city | stats_views | stats_downloads | ai_description | ai_primary_landmark_name | ai_primary_landmark_latitude | ai_primary_landmark_longitude | ai_primary_landmark_confidence | blur_hash | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | XMyPniM9LF0 | https://unsplash.com/photos/XMyPniM9LF0 | https://images.unsplash.com/uploads/1411949294... | 2014-09-29 00:08:38.594364 | t | 4272 | 2848 | 1.50 | Woman exploring a forest | michellespencer77 | ... | NaN | NaN | 2375421 | 6967 | woman walking in the middle of forest | NaN | NaN | NaN | NaN | L56bVcRRIWMh.gVunlS4SMbsRRxr |
| 1 | rDLBArZUl1c | https://unsplash.com/photos/rDLBArZUl1c | https://images.unsplash.com/photo-141633941111... | 2014-11-18 19:36:57.08945 | t | 3000 | 4000 | 0.75 | Succulents in a terrarium | ugmonk | ... | NaN | NaN | 13784815 | 82141 | succulent plants in clear glass terrarium | NaN | NaN | NaN | NaN | LvI$4txu%2s:_4t6WUj]xat7RPoe |
| 2 | cNDGZ2sQ3Bo | https://unsplash.com/photos/cNDGZ2sQ3Bo | https://images.unsplash.com/photo-142014251503... | 2015-01-01 20:02:02.097036 | t | 2564 | 1710 | 1.50 | Rural winter mountainside | johnprice | ... | NaN | NaN | 1302461 | 3428 | rocky mountain under gray sky at daytime | NaN | NaN | NaN | NaN | LhMj%NxvM{t7_4t7aeoM%2M{ozj[ |
| 3 | iuZ_D1eoq9k | https://unsplash.com/photos/iuZ_D1eoq9k | https://images.unsplash.com/photo-141487280988... | 2014-11-01 20:15:13.410073 | t | 2912 | 4368 | 0.67 | Poppy seeds and flowers | krisatomic | ... | NaN | NaN | 2890238 | 33704 | red common poppy flower selective focus phography | NaN | NaN | NaN | NaN | LSC7DirZAsX7}Br@GEWWmnoLWCnj |
| 4 | BeD3vjQ8SI0 | https://unsplash.com/photos/BeD3vjQ8SI0 | https://images.unsplash.com/photo-141700759404... | 2014-11-26 13:13:50.134383 | t | 4896 | 3264 | 1.50 | Silhouette near dark trees | jonaseriksson | ... | NaN | NaN | 8704860 | 49662 | trees during night time | NaN | NaN | NaN | NaN | L25|_:V@0hxtI=W;odae0ht6=^NG |
5 rows × 31 columns
df=df[['photo_id','photo_image_url','photo_description']]
df.head()
| photo_id | photo_image_url | photo_description | |
|---|---|---|---|
| 0 | XMyPniM9LF0 | https://images.unsplash.com/uploads/1411949294... | Woman exploring a forest |
| 1 | rDLBArZUl1c | https://images.unsplash.com/photo-141633941111... | Succulents in a terrarium |
| 2 | cNDGZ2sQ3Bo | https://images.unsplash.com/photo-142014251503... | Rural winter mountainside |
| 3 | iuZ_D1eoq9k | https://images.unsplash.com/photo-141487280988... | Poppy seeds and flowers |
| 4 | BeD3vjQ8SI0 | https://images.unsplash.com/photo-141700759404... | Silhouette near dark trees |
!pip install sentence_transformers transformers
from sentence_transformers import SentenceTransformer
model1 = SentenceTransformer('clip-ViT-B-32')
Collecting sentence_transformers
Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 86.0/86.0 kB 3.2 MB/s eta 0:00:00
Preparing metadata (setup.py) ... done
Requirement already satisfied: transformers in /opt/conda/lib/python3.7/site-packages (4.27.3)
Requirement already satisfied: tqdm in /opt/conda/lib/python3.7/site-packages (from sentence_transformers) (4.64.1)
Requirement already satisfied: torch>=1.6.0 in /opt/conda/lib/python3.7/site-packages (from sentence_transformers) (1.13.0+cpu)
Requirement already satisfied: torchvision in /opt/conda/lib/python3.7/site-packages (from sentence_transformers) (0.14.0+cpu)
Requirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from sentence_transformers) (1.21.6)
Requirement already satisfied: scikit-learn in /opt/conda/lib/python3.7/site-packages (from sentence_transformers) (1.0.2)
Requirement already satisfied: scipy in /opt/conda/lib/python3.7/site-packages (from sentence_transformers) (1.7.3)
Requirement already satisfied: nltk in /opt/conda/lib/python3.7/site-packages (from sentence_transformers) (3.2.4)
Requirement already satisfied: sentencepiece in /opt/conda/lib/python3.7/site-packages (from sentence_transformers) (0.1.97)
Requirement already satisfied: huggingface-hub>=0.4.0 in /opt/conda/lib/python3.7/site-packages (from sentence_transformers) (0.13.3)
Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.7/site-packages (from transformers) (6.0)
Requirement already satisfied: importlib-metadata in /opt/conda/lib/python3.7/site-packages (from transformers) (4.11.4)
Requirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.7/site-packages (from transformers) (2021.11.10)
Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.7/site-packages (from transformers) (23.0)
Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /opt/conda/lib/python3.7/site-packages (from transformers) (0.13.2)
Requirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from transformers) (2.28.2)
Requirement already satisfied: filelock in /opt/conda/lib/python3.7/site-packages (from transformers) (3.9.0)
Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.7/site-packages (from huggingface-hub>=0.4.0->sentence_transformers) (4.4.0)
Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata->transformers) (3.11.0)
Requirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from nltk->sentence_transformers) (1.16.0)
Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.7/site-packages (from requests->transformers) (2.1.1)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->transformers) (1.26.14)
Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->transformers) (3.4)
Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->transformers) (2022.12.7)
Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.7/site-packages (from scikit-learn->sentence_transformers) (3.1.0)
Requirement already satisfied: joblib>=0.11 in /opt/conda/lib/python3.7/site-packages (from scikit-learn->sentence_transformers) (1.2.0)
Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/conda/lib/python3.7/site-packages (from torchvision->sentence_transformers) (9.4.0)
Building wheels for collected packages: sentence_transformers
Building wheel for sentence_transformers (setup.py) ... done
Created wheel for sentence_transformers: filename=sentence_transformers-2.2.2-py3-none-any.whl size=125938 sha256=87499023c0abdb4ee19875e4b65b6808c22ca55cf7ec05eb2af087f94216a243
Stored in directory: /root/.cache/pip/wheels/83/71/2b/40d17d21937fed496fb99145227eca8f20b4891240ff60c86f
Successfully built sentence_transformers
Installing collected packages: sentence_transformers
Successfully installed sentence_transformers-2.2.2
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
Downloading (…)d52eb/.gitattributes: 0%| | 0.00/690 [00:00<?, ?B/s]
Downloading (…)LIPModel/config.json: 0%| | 0.00/4.03k [00:00<?, ?B/s]
Downloading (…)CLIPModel/merges.txt: 0%| | 0.00/525k [00:00<?, ?B/s]
Downloading (…)rocessor_config.json: 0%| | 0.00/316 [00:00<?, ?B/s]
Downloading pytorch_model.bin: 0%| | 0.00/605M [00:00<?, ?B/s]
Downloading (…)cial_tokens_map.json: 0%| | 0.00/389 [00:00<?, ?B/s]
Downloading (…)okenizer_config.json: 0%| | 0.00/604 [00:00<?, ?B/s]
Downloading (…)CLIPModel/vocab.json: 0%| | 0.00/961k [00:00<?, ?B/s]
Downloading (…)859cad52eb/README.md: 0%| | 0.00/1.88k [00:00<?, ?B/s]
Downloading (…)ce_transformers.json: 0%| | 0.00/116 [00:00<?, ?B/s]
Downloading (…)cad52eb/modules.json: 0%| | 0.00/122 [00:00<?, ?B/s]
model1.to(device)
SentenceTransformer( (0): CLIPModel() )
import requests
from PIL import Image
from io import BytesIO
import torch
url_list=df['photo_image_url'].tolist()
# Define a list to store the indices of the faulty URLs
faulty_indices = []
img_embeddings = []
for i, url in enumerate(url_list[:15000]):
try:
response = requests.get(url)
image = Image.open(BytesIO(response.content))
image_em = model1.encode(image,show_progress_bar=False)
img_embeddings.append(image_em)
if (i+1) % 1000== 0:
print("Processed {} samples".format(i+1))
except:
# If there was an error processing the URL, append the index to the faulty_indices list
faulty_indices.append(i)
continue
Processed 1000 samples Processed 2000 samples
/opt/conda/lib/python3.7/site-packages/PIL/Image.py:3170: DecompressionBombWarning: Image size (96012000 pixels) exceeds limit of 89478485 pixels, could be decompression bomb DOS attack. DecompressionBombWarning,
Processed 3000 samples Processed 4000 samples Processed 5000 samples
/opt/conda/lib/python3.7/site-packages/PIL/Image.py:3170: DecompressionBombWarning: Image size (99996755 pixels) exceeds limit of 89478485 pixels, could be decompression bomb DOS attack. DecompressionBombWarning,
Processed 6000 samples
/opt/conda/lib/python3.7/site-packages/PIL/Image.py:3170: DecompressionBombWarning: Image size (96768910 pixels) exceeds limit of 89478485 pixels, could be decompression bomb DOS attack. DecompressionBombWarning, /opt/conda/lib/python3.7/site-packages/PIL/Image.py:3170: DecompressionBombWarning: Image size (99991727 pixels) exceeds limit of 89478485 pixels, could be decompression bomb DOS attack. DecompressionBombWarning,
Processed 7000 samples Processed 8000 samples Processed 9000 samples
/opt/conda/lib/python3.7/site-packages/PIL/Image.py:3170: DecompressionBombWarning: Image size (143040000 pixels) exceeds limit of 89478485 pixels, could be decompression bomb DOS attack. DecompressionBombWarning, /opt/conda/lib/python3.7/site-packages/PIL/Image.py:3170: DecompressionBombWarning: Image size (94212096 pixels) exceeds limit of 89478485 pixels, could be decompression bomb DOS attack. DecompressionBombWarning,
Processed 10000 samples
/opt/conda/lib/python3.7/site-packages/PIL/Image.py:3170: DecompressionBombWarning: Image size (121500000 pixels) exceeds limit of 89478485 pixels, could be decompression bomb DOS attack. DecompressionBombWarning,
Processed 11000 samples
/opt/conda/lib/python3.7/site-packages/PIL/Image.py:3170: DecompressionBombWarning: Image size (107424768 pixels) exceeds limit of 89478485 pixels, could be decompression bomb DOS attack. DecompressionBombWarning, /opt/conda/lib/python3.7/site-packages/PIL/Image.py:3170: DecompressionBombWarning: Image size (147015000 pixels) exceeds limit of 89478485 pixels, could be decompression bomb DOS attack. DecompressionBombWarning, /opt/conda/lib/python3.7/site-packages/PIL/Image.py:3170: DecompressionBombWarning: Image size (107184040 pixels) exceeds limit of 89478485 pixels, could be decompression bomb DOS attack. DecompressionBombWarning,
Processed 12000 samples
/opt/conda/lib/python3.7/site-packages/PIL/Image.py:3170: DecompressionBombWarning: Image size (146784000 pixels) exceeds limit of 89478485 pixels, could be decompression bomb DOS attack. DecompressionBombWarning,
Processed 13000 samples
/opt/conda/lib/python3.7/site-packages/PIL/Image.py:3170: DecompressionBombWarning: Image size (90671520 pixels) exceeds limit of 89478485 pixels, could be decompression bomb DOS attack. DecompressionBombWarning,
Processed 14000 samples Processed 15000 samples
len(img_embeddings)
14992
faulty_indices
[163, 1505, 3097, 4506, 11488, 13011, 13566, 14537]
#img_embeddings
import numpy as np
image_embeddings = np.array(img_embeddings)
# Save the array to a .npy file
np.save('image_features1.npy', image_embeddings)
df2=df.head(15000)
df2=df2.drop(labels=faulty_indices ,axis=0)
df2 = df2.reset_index()
df2['img_embeddings']=img_embeddings
df2.to_csv('final_df_with_embeddings.csv')
df2.head()
| index | photo_id | photo_image_url | photo_description | img_embeddings | |
|---|---|---|---|---|---|
| 0 | 0 | XMyPniM9LF0 | https://images.unsplash.com/uploads/1411949294... | Woman exploring a forest | [-0.28357166, 0.6603953, 0.15704419, 0.1067858... |
| 1 | 1 | rDLBArZUl1c | https://images.unsplash.com/photo-141633941111... | Succulents in a terrarium | [-0.48558712, 0.41402024, -0.121365234, 0.1435... |
| 2 | 2 | cNDGZ2sQ3Bo | https://images.unsplash.com/photo-142014251503... | Rural winter mountainside | [-0.31590873, 0.6827722, 0.04307574, 0.2806446... |
| 3 | 3 | iuZ_D1eoq9k | https://images.unsplash.com/photo-141487280988... | Poppy seeds and flowers | [0.0051131845, 0.17519622, -0.23967189, -0.345... |
| 4 | 4 | BeD3vjQ8SI0 | https://images.unsplash.com/photo-141700759404... | Silhouette near dark trees | [-0.2817433, 0.017431736, -0.17361663, 0.06159... |
def cosine_sim(a,b):
dot_product = np.dot(a, b)
# compute magnitudes
magnitude_a = np.sqrt(np.dot(a, a))
magnitude_b = np.sqrt(np.dot(b, b))
# compute cosine similarity
cos_sim = dot_product / (magnitude_a * magnitude_b)
return cos_sim
example_query=np.array(model1.encode("roads inside the woods", show_progress_bar=True))
cosine_sims = [cosine_sim(example_query,embedding) for embedding in img_embeddings]
n = 3
top_n_indices = np.argsort(-np.array(cosine_sims))[:n]
selected_rows = df2.loc[top_n_indices.tolist(), 'photo_image_url']
for i,url in enumerate(selected_rows):
response = requests.get(url)
img = Image.open(BytesIO(response.content))
plt.imshow(img)
plt.show()
Batches: 0%| | 0/1 [00:00<?, ?it/s]
example_query=np.array(model1.encode("dogs in nature", show_progress_bar=True))
cosine_sims = [cosine_sim(example_query,embedding) for embedding in img_embeddings]
n = 3
top_n_indices = np.argsort(-np.array(cosine_sims))[:n]
selected_rows = df2.loc[top_n_indices.tolist(), 'photo_image_url']
for i,url in enumerate(selected_rows):
response = requests.get(url)
img = Image.open(BytesIO(response.content))
plt.imshow(img)
plt.show()
Batches: 0%| | 0/1 [00:00<?, ?it/s]
import numpy as np
example_query=np.array(model1.encode("happy wife", show_progress_bar=True))
cosine_sims = [cosine_sim(example_query,embedding) for embedding in img_embeddings]
n = 3
top_n_indices = np.argsort(-np.array(cosine_sims))[:n]
selected_rows = df2.loc[top_n_indices.tolist(), 'photo_image_url']
for i,url in enumerate(selected_rows):
response = requests.get(url)
img = Image.open(BytesIO(response.content))
plt.imshow(img)
plt.show()
Batches: 0%| | 0/1 [00:00<?, ?it/s]
example_query=np.array(model1.encode("red flower", show_progress_bar=True))
cosine_sims = [cosine_sim(example_query,embedding) for embedding in img_embeddings]
n = 3
top_n_indices = np.argsort(-np.array(cosine_sims))[:n]
selected_rows = df2.loc[top_n_indices.tolist(), 'photo_image_url']
for i,url in enumerate(selected_rows):
response = requests.get(url)
img = Image.open(BytesIO(response.content))
plt.imshow(img)
plt.show()
Batches: 0%| | 0/1 [00:00<?, ?it/s]
sample_image1= Image.open("/kaggle/input/sampleinput/alice-triquet-HeEJU3nrg_0-unsplash.jpg")
sample_image2= Image.open("/kaggle/input/sampleinput/jerry-zhang-SJGiS1JzUCc-unsplash.jpg")
sample_image3= Image.open("/kaggle/input/sampleinput/keith-tanner-f-U6iHYUzO8-unsplash.jpg")
sample_image_1_embed=np.array(model1.encode(sample_image1, show_progress_bar=True))
cosine_sims = [cosine_sim(sample_image_1_embed,embedding) for embedding in img_embeddings]
n = 3
top_n_indices = np.argsort(-np.array(cosine_sims))[:n]
selected_rows = df2.loc[top_n_indices.tolist(), 'photo_image_url']
print("inserted image:")
plt.imshow(sample_image1)
plt.show()
print("similar images by CLIP:")
for i,url in enumerate(selected_rows):
response = requests.get(url)
img = Image.open(BytesIO(response.content))
plt.imshow(img)
plt.show()
Batches: 0%| | 0/1 [00:00<?, ?it/s]
inserted image:
similar images by CLIP:
sample_image_1_embed=np.array(model1.encode(sample_image2, show_progress_bar=True))
cosine_sims = [cosine_sim(sample_image_1_embed,embedding) for embedding in img_embeddings]
n = 3
top_n_indices = np.argsort(-np.array(cosine_sims))[:n]
selected_rows = df2.loc[top_n_indices.tolist(), 'photo_image_url']
print("inserted image:")
plt.imshow(sample_image2)
plt.show()
print("similar images by CLIP:")
for i,url in enumerate(selected_rows):
response = requests.get(url)
img = Image.open(BytesIO(response.content))
plt.imshow(img)
plt.show()
Batches: 0%| | 0/1 [00:00<?, ?it/s]
inserted image:
similar images by CLIP:
sample_image_1_embed=np.array(model1.encode(sample_image3, show_progress_bar=True))
cosine_sims = [cosine_sim(sample_image_1_embed,embedding) for embedding in img_embeddings]
n = 3
top_n_indices = np.argsort(-np.array(cosine_sims))[:n]
selected_rows = df2.loc[top_n_indices.tolist(), 'photo_image_url']
print("inserted image:")
plt.imshow(sample_image3)
plt.show()
print("similar images by CLIP:")
for i,url in enumerate(selected_rows):
response = requests.get(url)
img = Image.open(BytesIO(response.content))
plt.imshow(img)
plt.show()
Batches: 0%| | 0/1 [00:00<?, ?it/s]
inserted image:
similar images by CLIP:
sample_image1= Image.open("/kaggle/input/sampleinput/alice-triquet-HeEJU3nrg_0-unsplash.jpg")
sample_image_1_embed=np.array(model1.encode(sample_image1, show_progress_bar=True))
example_query_embed=np.array(model1.encode("mountains", show_progress_bar=True))
result=example_query_embed+sample_image_1_embed
arr_normalized = (result - np.mean(result)) / (np.max(result))
#arr_normalized = result / normalized_arr
cosine_sims = [cosine_sim(arr_normalized,embedding) for embedding in img_embeddings]
n = 3
top_n_indices = np.argsort(-np.array(cosine_sims))[:n]
selected_rows = df2.loc[top_n_indices.tolist(), 'photo_image_url']
print("inserted image:")
plt.imshow(sample_image1)
plt.show()
print("similar images by CLIP:")
for i,url in enumerate(selected_rows):
response = requests.get(url)
img = Image.open(BytesIO(response.content))
plt.imshow(img)
plt.show()
Batches: 0%| | 0/1 [00:00<?, ?it/s]
Batches: 0%| | 0/1 [00:00<?, ?it/s]
inserted image:
similar images by CLIP:
query='happy life'
sample_image_3_embed=np.array(model1.encode(sample_image3, show_progress_bar=True))
example_query_embed=np.array(model1.encode("dark night", show_progress_bar=True))
result=example_query_embed+sample_image_3_embed
arr_normalized = (result - np.mean(result)) / (np.max(result))
#arr_normalized = result / normalized_arr
cosine_sims = [cosine_sim(arr_normalized,embedding) for embedding in img_embeddings]
n = 3
top_n_indices = np.argsort(-np.array(cosine_sims))[:n]
selected_rows = df2.loc[top_n_indices.tolist(), 'photo_image_url']
print("inserted image:")
plt.imshow(sample_image3)
plt.show()
print("similar images by CLIP:")
for i,url in enumerate(selected_rows):
response = requests.get(url)
img = Image.open(BytesIO(response.content))
plt.imshow(img)
plt.show()
Batches: 0%| | 0/1 [00:00<?, ?it/s]
Batches: 0%| | 0/1 [00:00<?, ?it/s]
inserted image:
similar images by CLIP:
cap = cv2.VideoCapture('/kaggle/input/positano/positano.mp4')
cap.set(cv2.CAP_PROP_POS_FRAMES, 500)
ret, frame = cap.read()
frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
url = 'https://drive.google.com/file/d/1ZWJJuH6WdH5y56mZClCfcoF-lMPM8k9k/view'
from IPython.display import HTML
HTML(f"""<video src={url} autoplay loop width=600 controls/>""")
def video_duration(path):
video = cv2.VideoCapture(path)
fps = video.get(cv2.CAP_PROP_FPS)
frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT)
return frame_count/fps
a=video_duration('/kaggle/input/positano/positano.mp4')
def video_frame(path,timestamp):
video = cv2.VideoCapture(path)
fps = video.get(cv2.CAP_PROP_FPS)
video.set(cv2.CAP_PROP_POS_MSEC, timestamp * 1000)
ret,frame = video.read()
frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
return frame if ret else None
video_embeddings=[]
for i in range(int(a)):
f=video_frame('/kaggle/input/positano/positano.mp4',i)
frame_embedding = model1.encode(Image.fromarray(f),show_progress_bar=False, convert_to_tensor=True)
video_embeddings.append(frame_embedding)
len(video_embeddings)
151
# Get the text embedding
text = "beach"
text_embedding = model1.encode(text, convert_to_tensor=True)
#frame_embeddings = np.vstack(frame_embeddings)
# Calculate the cosine similarity between the text embedding and each frame embedding
cosine_sims = [cosine_similarity(text_embedding.reshape(1, 512),frame.reshape(1, 512))[0,0] for frame in video_embeddings]
#print(len(cosine_sims))
# Sort the frames by similarity
sorted_frames = np.argsort(cosine_sims)[::-1]
Batches: 0%| | 0/1 [00:00<?, ?it/s]
for i in range(7):
index=sorted_frames[i]
print(index)
frame=video_frame('/kaggle/input/positano/positano.mp4',index)
plt.imshow(frame)
plt.show()
55
54
53
60
61
62
27
text = "church"
text_embedding = model1.encode(text, convert_to_tensor=True)
#frame_embeddings = np.vstack(frame_embeddings)
# Calculate the cosine similarity between the text embedding and each frame embedding
cosine_sims = [cosine_similarity(text_embedding.reshape(1, 512),frame.reshape(1, 512))[0,0] for frame in video_embeddings]
#print(len(cosine_sims))
# Sort the frames by similarity
sorted_frames = np.argsort(cosine_sims)[::-1]
for i in range(7):
index=sorted_frames[i]
print(index)
frame=video_frame('/kaggle/input/positano/positano.mp4',index)
plt.imshow(frame)
plt.show()
Batches: 0%| | 0/1 [00:00<?, ?it/s]
112
111
108
113
110
109
107
text = "village"
text_embedding = model1.encode(text, convert_to_tensor=True)
#frame_embeddings = np.vstack(frame_embeddings)
# Calculate the cosine similarity between the text embedding and each frame embedding
cosine_sims = [cosine_similarity(text_embedding.reshape(1, 512),frame.reshape(1, 512))[0,0] for frame in video_embeddings]
#print(len(cosine_sims))
# Sort the frames by similarity
sorted_frames = np.argsort(cosine_sims)[::-1]
for i in range(7):
index=sorted_frames[i]
print(index)
frame=video_frame('/kaggle/input/positano/positano.mp4',index)
plt.imshow(frame)
plt.show()
Batches: 0%| | 0/1 [00:00<?, ?it/s]
6
12
38
126
11
10
125
text = "boat"
text_embedding = model1.encode(text, convert_to_tensor=True)
#frame_embeddings = np.vstack(frame_embeddings)
# Calculate the cosine similarity between the text embedding and each frame embedding
cosine_sims = [cosine_similarity(text_embedding.reshape(1, 512),frame.reshape(1, 512))[0,0] for frame in video_embeddings]
#print(len(cosine_sims))
# Sort the frames by similarity
sorted_frames = np.argsort(cosine_sims)[::-1]
for i in range(7):
index=sorted_frames[i]
print(index)
frame=video_frame('/kaggle/input/positano/positano.mp4',index)
plt.imshow(frame)
plt.show()
Batches: 0%| | 0/1 [00:00<?, ?it/s]
28
30
31
29
56
58
57